offers_path <- "C:/Users/AKhanna/OneDrive - Essential Services Commission/Documents/Transfers-Reporting/transfer_vs_offer_analysis/VEC Elec Offers with CPRG VDO.xlsx"

transfers_path <- "C:/Users/AKhanna/OneDrive - Essential Services Commission/Documents/Transfers-Reporting/transfer_vs_offer_analysis/transfers_raw.rds"

ANALYSIS 1: Retailer vs Victorian Offer Price

## 
## --- CURRENT WEEK PRICING (Lag 0) ---
## # A tibble: 5 × 3
##   price_group               avg_weekly_transfer_rate n_retailer_weeks
##   <fct>                                        <dbl>            <int>
## 1 Much lower (<-10%)                           0.412             2621
## 2 Moderately lower (5-10%)                     0.291             3414
## 3 Near average (±5%)                           0.329            10213
## 4 Moderately higher (5-10%)                    0.341             2007
## 5 Much higher (>10%)                           0.475             1862
## 
## --- 2 WEEKS AGO PRICING (Lag 2w) ---
## # A tibble: 5 × 3
##   price_group               avg_weekly_transfer_rate n_retailer_weeks
##   <fct>                                        <dbl>            <int>
## 1 Much lower (<-10%)                           0.401             2776
## 2 Moderately lower (5-10%)                     0.293             3470
## 3 Near average (±5%)                           0.337             9674
## 4 Moderately higher (5-10%)                    0.354             2032
## 5 Much higher (>10%)                           0.475             1856
## 
## --- 4 WEEKS AGO PRICING (Lag 4w) ---
## # A tibble: 5 × 3
##   price_group               avg_weekly_transfer_rate n_retailer_weeks
##   <fct>                                        <dbl>            <int>
## 1 Much lower (<-10%)                           0.394             2900
## 2 Moderately lower (5-10%)                     0.291             3400
## 3 Near average (±5%)                           0.375             9749
## 4 Moderately higher (5-10%)                    0.319             2001
## 5 Much higher (>10%)                           0.558             1742
## 
## --- 8 WEEKS AGO PRICING (Lag 8w) ---
## # A tibble: 5 × 3
##   price_group               avg_weekly_transfer_rate n_retailer_weeks
##   <fct>                                        <dbl>            <int>
## 1 Much lower (<-10%)                           0.362             3024
## 2 Moderately lower (5-10%)                     0.311             3392
## 3 Near average (±5%)                           0.397             9005
## 4 Moderately higher (5-10%)                    0.295             1896
## 5 Much higher (>10%)                           0.565             1705
## 
## --- PENALTY FOR BEING >10% ABOVE MARKET ---
## Which timeframe shows the biggest impact?
## # A tibble: 4 × 3
##   lag                  difference_from_baseline pct_difference
##   <fct>                                   <dbl>          <dbl>
## 1 4 Weeks Ago (Lag 4w)                    0.183           48.9
## 2 8 Weeks Ago (Lag 8w)                    0.168           42.1
## 3 Current Week (Lag 0)                    0.146           44.2
## 4 2 Weeks Ago (Lag 2w)                    0.138           41.0
## 
## ✓ INSIGHT: The ' 3 ' timeframe shows the strongest effect
##   Retailers >10% above market have 0.18% points higher transfer rate (48.9% more)

ANALYSIS 2: RELATIONSHIP BETWEEN OFFER PRICES AND TRANSFERS

## # A tibble: 20 × 3
##    price_bucket avg_transfer_rate n_observations
##           <dbl>             <dbl>          <int>
##  1          -18             0.354             82
##  2          -16             0.385            455
##  3          -14             0.332            652
##  4          -12             0.532            949
##  5          -10             0.336            911
##  6           -8             0.304           1237
##  7           -6             0.265           1741
##  8           -4             0.260           1216
##  9           -2             0.263           2595
## 10            0             0.466           2931
## 11            2             0.278           1871
## 12            4             0.296           1600
## 13            6             0.269           1067
## 14            8             0.368            686
## 15           10             0.439            635
## 16           12             0.346            297
## 17           14             0.397            284
## 18           16             0.490             74
## 19           18             0.372             41
## 20           20             0.337             74

ANALYSIS 3: REGRESSION MODELS

## === REGRESSION DIAGNOSTICS ===
## Rows in regression_data: 20117
##  price_vs_market    avg_contract_term pct_solar_available pct_with_incentive
##  Min.   :-22.9521   Min.   :0.0000    Min.   :  0.00      Min.   :  0.00    
##  1st Qu.: -6.1545   1st Qu.:0.0000    1st Qu.:100.00      1st Qu.:  0.00    
##  Median : -0.7358   Median :0.0000    Median :100.00      Median : 71.43    
##  Mean   : -0.3691   Mean   :0.1121    Mean   : 98.44      Mean   : 55.59    
##  3rd Qu.:  3.4771   3rd Qu.:0.0000    3rd Qu.:100.00      3rd Qu.:100.00    
##  Max.   : 88.0044   Max.   :1.1509    Max.   :100.00      Max.   :100.00    
##  pct_guaranteed_discount avg_annual_fee     avg_late_fee    
##  Min.   :  0.00          Min.   :   0.00   Min.   : 0.0000  
##  1st Qu.:  0.00          1st Qu.:   0.00   1st Qu.: 0.0000  
##  Median :  0.00          Median :   0.00   Median : 0.0000  
##  Mean   : 20.01          Mean   :  34.82   Mean   : 0.9632  
##  3rd Qu.: 28.57          3rd Qu.:   0.00   3rd Qu.: 0.0000  
##  Max.   :100.00          Max.   :1560.00   Max.   :16.0000  
##  customer_count      
##  Min.   :1.092e-321  
##  1st Qu.:8.528e-320  
##  Median :5.343e-319  
##  Mean   :9.300e-319  
##  3rd Qu.:1.187e-318  
##  Max.   :3.818e-318

LINEAR REGRESSION MODEL

## 
## REGRESSION RESULTS:
## 
## Call:
## lm(formula = total_transfers_out ~ price_vs_market + price_competitiveness_rank + 
##     pct_guaranteed_discount + pct_conditional_discount + avg_contract_term + 
##     pct_solar_available + pct_with_incentive + avg_annual_fee + 
##     avg_late_fee + log_customer_size, data = regression_data)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -4.659e-321 -1.067e-321 -2.421e-322  6.275e-322  1.670e-320 
## 
## Coefficients:
##                               Estimate  Std. Error t value Pr(>|t|)    
## (Intercept)                 6.946e-319   0.000e+00     Inf   <2e-16 ***
## price_vs_market             9.881e-324   0.000e+00     Inf   <2e-16 ***
## price_competitiveness_rank   0.000e+00   0.000e+00     NaN      NaN    
## pct_guaranteed_discount      0.000e+00   0.000e+00     NaN      NaN    
## pct_conditional_discount   -4.941e-324   0.000e+00    -Inf   <2e-16 ***
## avg_contract_term           6.719e-322   0.000e+00     Inf   <2e-16 ***
## pct_solar_available        -9.881e-324   0.000e+00    -Inf   <2e-16 ***
## pct_with_incentive          4.941e-324   0.000e+00     Inf   <2e-16 ***
## avg_annual_fee               0.000e+00   0.000e+00     NaN      NaN    
## avg_late_fee                3.953e-323   0.000e+00     Inf   <2e-16 ***
## log_customer_size           9.437e-322   0.000e+00     Inf   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0 on 20106 degrees of freedom
## Multiple R-squared:    NaN,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 10 and 20106 DF,  p-value: NA
##                   Variable    Coefficient Std_Error T_Value P_Value Significant
## 1          price_vs_market  9.881313e-324         0     Inf       0         YES
## 2 pct_conditional_discount -4.940656e-324         0    -Inf       0         YES
## 3        avg_contract_term  6.719293e-322         0     Inf       0         YES
## 4      pct_solar_available -9.881313e-324         0    -Inf       0         YES
## 5       pct_with_incentive  4.940656e-324         0     Inf       0         YES
## 6             avg_late_fee  3.952525e-323         0     Inf       0         YES
## 7        log_customer_size  9.436654e-322         0     Inf       0         YES
##      Direction                               Interpretation
## 1 Positive (+)               Higher prices → More transfers
## 2 Negative (-) More conditional discounts → Fewer transfers
## 3 Positive (+)            Longer contracts → More transfers
## 4 Negative (-)                              See coefficient
## 5 Positive (+)             More incentives → More transfers
## 6 Positive (+)            Higher late fees → More transfers
## 7 Positive (+)                              See coefficient
## 
## Model Fit:
##   R-squared: NaN (NaN%)
##   Adjusted R-squared: NaN

PLOT: LINEAR REGRESSION COEFFICIENT PLOT

MULTICOLLINEARITY CHECK (VIF)

## 
## Variance Inflation Factors (VIF):
## Rules: VIF < 5 = OK, VIF 5-10 = Moderate concern, VIF > 10 = High multicollinearity
## 
##            price_vs_market price_competitiveness_rank 
##                        NaN                        NaN 
##    pct_guaranteed_discount   pct_conditional_discount 
##                        NaN                        NaN 
##          avg_contract_term        pct_solar_available 
##                        NaN                        NaN 
##         pct_with_incentive             avg_annual_fee 
##                        NaN                        NaN 
##               avg_late_fee          log_customer_size 
##                        NaN                        NaN 
## 
## ⚠️  Variables with multicollinearity concerns:
## <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 
##   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA 
## 
## Consider removing one of the correlated variables or using Ridge/Lasso regression.

POLYNOMIAL REGRESSION (NON-LINEAR)

## 
## Call:
## lm(formula = total_transfers_out ~ price_vs_market + I(price_vs_market^2) + 
##     pct_guaranteed_discount + I(pct_guaranteed_discount^2) + 
##     pct_conditional_discount + I(pct_conditional_discount^2) + 
##     avg_contract_term + I(avg_contract_term^2) + pct_with_incentive + 
##     I(pct_with_incentive^2) + avg_annual_fee + I(avg_annual_fee^2) + 
##     avg_late_fee + log_customer_size, data = regression_data)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -4.570e-321 -1.112e-321 -1.877e-322  6.571e-322  1.656e-320 
## 
## Coefficients:
##                                  Estimate  Std. Error t value Pr(>|t|)    
## (Intercept)                    6.826e-319   0.000e+00     Inf   <2e-16 ***
## price_vs_market                9.881e-324   0.000e+00     Inf   <2e-16 ***
## I(price_vs_market^2)            0.000e+00   0.000e+00     NaN      NaN    
## pct_guaranteed_discount        4.941e-324   0.000e+00     Inf   <2e-16 ***
## I(pct_guaranteed_discount^2)    0.000e+00   0.000e+00     NaN      NaN    
## pct_conditional_discount        0.000e+00   0.000e+00     NaN      NaN    
## I(pct_conditional_discount^2)   0.000e+00   0.000e+00     NaN      NaN    
## avg_contract_term              1.112e-321   0.000e+00     Inf   <2e-16 ***
## I(avg_contract_term^2)        -5.138e-322   0.000e+00    -Inf   <2e-16 ***
## pct_with_incentive             4.941e-324   0.000e+00     Inf   <2e-16 ***
## I(pct_with_incentive^2)         0.000e+00   0.000e+00     NaN      NaN    
## avg_annual_fee                  0.000e+00   0.000e+00     NaN      NaN    
## I(avg_annual_fee^2)             0.000e+00   0.000e+00     NaN      NaN    
## avg_late_fee                   3.458e-323   0.000e+00     Inf   <2e-16 ***
## log_customer_size              9.288e-322   0.000e+00     Inf   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0 on 20102 degrees of freedom
## Multiple R-squared:    NaN,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 14 and 20102 DF,  p-value: NA

INTERACTION EFFECTS

## 
## Call:
## lm(formula = total_transfers_out ~ price_vs_market * pct_conditional_discount + 
##     price_vs_market * avg_contract_term + pct_with_incentive * 
##     avg_contract_term + pct_guaranteed_discount * pct_with_incentive + 
##     log_customer_size * price_vs_market, data = regression_data)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -4.634e-321 -1.062e-321 -2.124e-322  6.818e-322  1.654e-320 
## 
## Coefficients:
##                                               Estimate  Std. Error t value
## (Intercept)                                 7.007e-319   0.000e+00     Inf
## price_vs_market                            -3.617e-321   0.000e+00    -Inf
## pct_conditional_discount                     0.000e+00   0.000e+00     NaN
## avg_contract_term                           6.620e-322   0.000e+00     Inf
## pct_with_incentive                          4.941e-324   0.000e+00     Inf
## pct_guaranteed_discount                     4.941e-324   0.000e+00     Inf
## log_customer_size                           9.535e-322   0.000e+00     Inf
## price_vs_market:pct_conditional_discount     0.000e+00   0.000e+00     NaN
## price_vs_market:avg_contract_term           1.482e-323   0.000e+00     Inf
## avg_contract_term:pct_with_incentive         0.000e+00   0.000e+00     NaN
## pct_with_incentive:pct_guaranteed_discount   0.000e+00   0.000e+00     NaN
## price_vs_market:log_customer_size          -4.941e-324   0.000e+00    -Inf
##                                            Pr(>|t|)    
## (Intercept)                                  <2e-16 ***
## price_vs_market                              <2e-16 ***
## pct_conditional_discount                        NaN    
## avg_contract_term                            <2e-16 ***
## pct_with_incentive                           <2e-16 ***
## pct_guaranteed_discount                      <2e-16 ***
## log_customer_size                            <2e-16 ***
## price_vs_market:pct_conditional_discount        NaN    
## price_vs_market:avg_contract_term            <2e-16 ***
## avg_contract_term:pct_with_incentive            NaN    
## pct_with_incentive:pct_guaranteed_discount      NaN    
## price_vs_market:log_customer_size            <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0 on 20105 degrees of freedom
## Multiple R-squared:    NaN,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 11 and 20105 DF,  p-value: NA

RANDOM FOREST

PLOT: RANDOM FOREST VARIABLE IMPORTANCE

XGBOOST

PLOT: XGBOOST VARIABLE IMPORTANCE

FINAL MODEL COMPARISON

##                   Model R_Squared
## 1               XGBoost      -Inf
## 2     Linear Regression       NaN
## 3 Polynomial Regression       NaN
## 4     Interaction Model       NaN
## 5         Random Forest       NaN

ANALYSIS 3: Variable Analysis

4A. CONTRACT TERM EFFECTS

## # A tibble: 3 × 4
##   contract_category     avg_transfer_rate median_transfer_rate n_weeks
##   <chr>                             <dbl>                <dbl>   <int>
## 1 1-year typical                    0.200                0.182    1731
## 2 No fixed term                     0.373                0.268   17080
## 3 Short-term / flexible             0.221                0.202    1306

4B. SOLAR AVAILABILITY EFFECTS

## # A tibble: 2 × 3
##   solar_category               avg_transfer_rate n_weeks
##   <chr>                                    <dbl>   <int>
## 1 Mostly solar-friendly offers             0.348   20081
## 2 No solar options                         0.240      36

4C. INCENTIVES EFFECTS

## # A tibble: 3 × 3
##   incentive_category             avg_transfer_rate n_weeks
##   <chr>                                      <dbl>   <int>
## 1 Most offers include incentives             0.300   11776
## 2 No incentives                              0.474    5682
## 3 Some incentives                            0.292    2659

SIMPLE SCATTER PLOTS: TRANSFER RATE vs FEES